Django nested serializer prefetching before validation

Hello,

I am having some difficulty with prefetching nested model objects, so the number of repeated queries are causing big performance issues.

I already tried to learn from a couple of blog posts, but I did not succeed. Maybe I am just too simple minded.
example:

models.py

class Project(models.Model):

    project_name = models.CharField(max_length=255)
    status = models.CharField(max_length= 255)
    owner = models.ForeignKey(User, null=True, on_delete=models.CASCADE)
    user_id = models.PositiveIntegerField()

    def __str__(self) -> str:
        return self.project_name
    
class Country(models.Model):
    SUPPORTED = "supported"
    NOT_SUPPORTED = "not_supported"
    SUSPENDED = "suspended"
    STATUS_CHOICES = [
        (SUPPORTED, "Supported"),
        (NOT_SUPPORTED, "Not supported"),
        (SUSPENDED, "Suspended"),
    ]
    
    name = models.CharField(max_length=255)
    iso_code2 = models.CharField(max_length=2, primary_key=True)
    iso_code3 = models.CharField(max_length=3, unique=True)
    status = models.CharField(max_length=255, choices=STATUS_CHOICES, default=NOT_SUPPORTED)

    class Meta:
        verbose_name_plural = "Countries"
        ordering = ["name"]

    def __str__(self) -> str:
        return self.name

class Site(models.Model):
    project = models.ForeignKey(Project, on_delete=models.CASCADE, null=True, related_name="sites")
    site_id1 = models.CharField(max_length=255)
    site_id2 = models.CharField(max_length=255, blank=True)
    country_code = models.ForeignKey(Country, on_delete=models.CASCADE)
    postal_code = models.CharField(max_length=255)
    city = models.CharField(max_length=255)
    address = models.CharField(max_length=512)

serializers.py

class SiteSerializer(serializers.ModelSerializer):
    def validate(self, attrs):
        error_details = {}
        country = attrs.get("country_code")

        if country.status != "supported":
            error_details.setdefault("country", []).append(f"The given country_code: '{country.iso_code2}' ({country.name}) is currently not supported. Current status: {country.status}")
        
        
        if error_details:
            raise serializers.ValidationError(detail=error_details, code="config_error")
        return super().validate(attrs)
    
    class Meta:
        model = Site
        
        fields = [
            "site_id1",
            "site_id2",
            "country_code",
            "city",
            "postal_code",
            "address",
        ]



class FileProjectSerializer(serializers.ModelSerializer):
    sites = SiteSerializer(many=True, min_length= 1, max_length= 3000, required=True)

    class Meta:
        model = Project

        fields = [
            "project_name",
            "status",
            "sites",
            "owner",
            "user_id",
        ]

        read_only_fields = [
            "owner",
            "status",
        ]
    
    def create(self, validated_data):
        sites = validated_data.pop("sites",[])
        project = Project.objects.create(**validated_data)
        for site in sites:
            Site.objects.create(project=project, **site)
        return project

views.py

class FileUploadView(generics.CreateAPIView):
    parser_classes = [FormParser, MultiPartParser, FileUploadParser]
    permission_classes = [permissions.IsAuthenticated]
    serializer_class = FileProjectSerializer
        
    def perform_create(self, serializer):
        return serializer.save()
    
    def create(self, request, *args, **kwargs):
        file_obj = request.FILES.get("file", None)
        if file_obj is None:
            raise ParseError(detail=f"No file found.")
        
        #file converting I did not include these since it is only conversion to dict with pandas
        sites = self.get_data_from_file_as_dict(file_obj)
        sites = self.translate_file_keys_to_model(sites)

        project_name = file_obj.name.replace(".xlsx", "").replace(".csv","")
        project_name = project_name.encode("ascii", "replace").decode()
        #preparing data for serializer (user_id will be sent via request, now its just static)
        project = {
            "project_name": project_name,
            "sites": sites,
            "user_id": 2345678543
        }
        #serializing
        serializer = self.get_serializer(data=project)
        
        serializer.is_valid(raise_exception=True)
        
        instance = self.perform_create(serializer)
        
        response_data = serializer.data
        
        response = {
            "project_name": response_data["project_name"],
            "sites": response_data["sites"]
        }

        #deleting the instance because it is only a file upload which returns a validated json with additional default values
        #maybe there is a better solution, but that might be another topic
        instance.delete()
        
        return Response(data= response, status=status.HTTP_200_OK)

Silky results



image

Any help would be appreciated.

Hi,

I couldn’t see your “get_queryset” method, would you add it ?

one improvement i can see so far is using bulk create in your serializer, have you tried it ?

Hello Daniel,

I have not included it into my viewset, because after multiple tests I realised that it is not even called in case of post method.

I also looked after this on drf-docs.
According to drf-docs, it is used for list views and retrieve views.
Django rest framework docs

On stackoverflow I found the following:
Stackoverflow: do we need query set in CreateAPIView?

If I remember correctly I had something as following, but scrapped it after realised it is not even called…

def get_queryset(self):
        queryset = Project.objects.prefetch_related(
            "sites",
            "sites__country_code"
        )
        return queryset

“one improvement i can see so far is using bulk create in your serializer, have you tried it ?”

Regarding the bulk create method there is a slight change in my actual models. The nesting does not stop at Site model, but I did not include all the submodels, so it would be easier to understand.