I’m new to Django, and am having huge difficulties with performance due to an apparent N+1 problem. I’m having an issue where I get identical SELECT queries to identical related models hundreds of times. I have tried using Prefetch, prefetch_related (for many-to-many), and select_reated, and caching the related entities, but to no avail.
In the following code, when looking at the admin view for one Organization, with hundreds of related publication, I get 100s of related SELECT calls to trends_eventtype and trends_publicationtype, such as
SELECT "trends_publicationtype"."id", "trends_publicationtype"."name" FROM "trends_publicationtype" WHERE "trends_publicationtype"."id" = 1 LIMIT 21; args=(1,); alias=default
And -
SELECT "trends_eventtype"."id", "trends_eventtype"."name" FROM "trends_eventtype" WHERE "trends_eventtype"."id" = 1 LIMIT 21; args=(1,); alias=default
In this test “trends_eventtype”.“id” is always 1 and similarly for publication_type
Currently, I’m working on my admin page:
from django.contrib import admin
from django.core.cache import cache
from django.db.models import Prefetch
from .models import (
TrendData, Term, TrendTimeSeriesData, Point, Batch, Category, Location,
Person, EventType, Event, PublicationType, Organization, Publication,
EventPublication, PublicationPerson, PublicationOrganization, EventPerson,
EventOrganization, EventLocation, Settings, Platform, Filter, Expertise, Source, Value, Attribute, EventAttribute,
PublicationAttribute
)
from .forms import DynamicEventAdminForm, DynamicPublicationAdminForm
class CachedModelAdmin(admin.ModelAdmin):
list_select_related = True
def get_queryset(self, request):
cache_key = f'admin_{self.model._meta.model_name}_queryset'
queryset = cache.get(cache_key)
if not queryset:
queryset = super().get_queryset(request).select_related()
cache.set(cache_key, queryset, timeout=60 * 15)
return queryset
class PublicationPersonInline(admin.TabularInline):
model = PublicationPerson
extra = 1
raw_id_fields = ('person',)
class PublicationOrganizationInline(admin.TabularInline):
model = PublicationOrganization
extra = 1
raw_id_fields = ('organization',)
def get_queryset(self, request):
qs = super().get_queryset(request)
publication_types_cache = cache.get('publication_types_cache', {})
for po in qs:
po.publication.publication_type = publication_types_cache.get(po.publication.publication_type_id, po.publication.publication_type)
return qs
class EventPublicationInline(admin.TabularInline):
model = EventPublication
extra = 1
raw_id_fields = ('publication',)
class EventPersonInline(admin.TabularInline):
model = EventPerson
extra = 1
raw_id_fields = ('person',)
class EventOrganizationInline(admin.TabularInline):
model = EventOrganization
extra = 1
raw_id_fields = ('organization',)
def get_queryset(self, request):
qs = super().get_queryset(request)
event_types_cache = cache.get('event_types_cache', {})
for eo in qs:
eo.event.event_type = event_types_cache.get(eo.event.event_type_id, eo.event.event_type)
return qs
class EventLocationInline(admin.TabularInline):
model = EventLocation
extra = 1
raw_id_fields = ('location',)
class EventAttributeInline(admin.TabularInline):
model = EventAttribute
extra = 1
class PublicationAttributeInline(admin.TabularInline):
model = PublicationAttribute
extra = 1
class EventAdmin(CachedModelAdmin):
form = DynamicEventAdminForm
inlines = [EventPublicationInline, EventPersonInline, EventOrganizationInline, EventLocationInline]
list_display = ('name', 'event_type', 'start_date', 'end_date')
raw_id_fields = ('category', 'filters', 'batch')
autocomplete_fields = ('event_type',)
change_form_template = 'admin/trends/event/change_form.html'
def get_queryset(self, request):
queryset = super().get_queryset(request).select_related('category', 'batch', 'event_type').only(
'id', 'name', 'event_type_id', 'start_date', 'end_date', 'category__id', 'batch__id'
)
# Prefetch related objects efficiently
return queryset.prefetch_related(
Prefetch('eventpublication_set', queryset=EventPublication.objects.select_related('publication')),
Prefetch('eventperson_set', queryset=EventPerson.objects.select_related('person')),
Prefetch('eventorganization_set', queryset=EventOrganization.objects.select_related('organization')),
Prefetch('eventlocation_set', queryset=EventLocation.objects.select_related('location')),
Prefetch('value_set', queryset=Value.objects.select_related('attribute').only('id', 'event', 'attribute', 'value_type', 'text_value', 'integer_value', 'boolean_value', 'float_value'))
)
class PublicationAdmin(CachedModelAdmin):
form = DynamicPublicationAdminForm
inlines = [PublicationPersonInline, PublicationOrganizationInline, EventPublicationInline]
list_display = ('name', 'publication_type', 'point', 'event')
raw_id_fields = ('authors', 'locations', 'publishers', 'point', 'event', 'batch')
autocomplete_fields = ('publication_type',)
def get_queryset(self, request):
queryset = super().get_queryset(request).select_related('point', 'event', 'batch', 'publication_type').only(
'id', 'name', 'publication_type_id', 'point__id', 'event__id', 'batch__id'
)
publication_types_cache = cache.get('publication_types_cache', {})
for publication in queryset:
publication.publication_type = publication_types_cache.get(publication.publication_type_id, publication.publication_type)
publication_people = PublicationPerson.objects.select_related('person')
publication_organizations = PublicationOrganization.objects.select_related('organization')
event_publications = EventPublication.objects.select_related('publication')
return queryset.prefetch_related(
Prefetch('publicationperson_set', queryset=publication_people),
Prefetch('publicationorganization_set', queryset=publication_organizations),
Prefetch('eventpublication_set', queryset=event_publications),
Prefetch('value_set', queryset=Value.objects.select_related('attribute'))
)
class EventTypeAdmin(admin.ModelAdmin):
inlines = [EventAttributeInline]
list_display = ('name',)
search_fields = ('name',)
class PublicationTypeAdmin(admin.ModelAdmin):
inlines = [PublicationAttributeInline]
list_display = ('name',)
search_fields = ('name',)
class PersonAdmin(CachedModelAdmin):
inlines = [PublicationPersonInline, EventPersonInline]
raw_id_fields = ('organizations', 'batch')
search_fields = ('name',)
def get_queryset(self, request):
queryset = super().get_queryset(request).only('id', 'name', 'organizations', 'batch')
publication_people = PublicationPerson.objects.select_related('publication')
event_people = EventPerson.objects.select_related('event')
return queryset.prefetch_related(
Prefetch('publicationperson_set', queryset=publication_people),
Prefetch('eventperson_set', queryset=event_people)
)
class OrganizationAdmin(CachedModelAdmin):
inlines = [PublicationOrganizationInline, EventOrganizationInline]
raw_id_fields = ('persons', 'batch')
search_fields = ('name',)
def get_queryset(self, request):
queryset = super().get_queryset(request).select_related('batch').only(
'id', 'name', 'batch__id'
)
# Cache EventType and PublicationType to avoid redundant queries
event_types_cache = cache.get_or_set(
'event_types_cache',
{event_type.id: event_type for event_type in EventType.objects.all()},
timeout=60*60
)
publication_types_cache = cache.get_or_set(
'publication_types_cache',
{publication_type.id: publication_type for publication_type in PublicationType.objects.all()},
timeout=60*60
)
publication_organizations = PublicationOrganization.objects.select_related(
'publication__publication_type').only(
'id', 'organization', 'publication__id', 'publication__name', 'publication__publication_type__id',
'publication__publication_type__name'
)
event_organizations = EventOrganization.objects.select_related('event__event_type').only(
'id', 'organization', 'event__id', 'event__name', 'event__event_type__id', 'event__event_type__name'
)
for organization in queryset:
for pub_org in organization.publicationorganization_set.all():
pub_org.publication.publication_type = publication_types_cache.get(
pub_org.publication.publication_type_id)
for event_org in organization.eventorganization_set.all():
event_org.event.event_type = event_types_cache.get(event_org.event.event_type_id)
return queryset.prefetch_related(
Prefetch('publicationorganization_set', queryset=publication_organizations),
Prefetch('eventorganization_set', queryset=event_organizations)
)
admin.site.register(Event, EventAdmin)
admin.site.register(Publication, PublicationAdmin)
admin.site.register(EventType, EventTypeAdmin)
admin.site.register(PublicationType, PublicationTypeAdmin)
admin.site.register(Person, PersonAdmin)
admin.site.register(Organization, OrganizationAdmin)
admin.site.register(TrendData)
admin.site.register(Term)
admin.site.register(TrendTimeSeriesData)
admin.site.register(Point)
admin.site.register(Batch)
admin.site.register(Category)
admin.site.register(Location)
admin.site.register(Settings)
admin.site.register(Platform)
admin.site.register(Filter)
admin.site.register(Expertise)
admin.site.register(Source)
admin.site.register(Attribute)
admin.site.register(Value)
Here are my models:
from django.db import models
from django.contrib.contenttypes.models import ContentType
from django.core.cache import cache
from django.core.exceptions import ValidationError
class BaseModel(models.Model):
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
updated_at = models.DateTimeField(auto_now=True, db_index=True)
class Meta:
abstract = True
class Attribute(models.Model):
TEXT = 'text'
INTEGER = 'integer'
BOOLEAN = 'boolean'
FLOAT = 'float'
VALUE_TYPE_CHOICES = [
(TEXT, 'Text'),
(INTEGER, 'Integer'),
(BOOLEAN, 'Boolean'),
(FLOAT, 'Float'),
]
name = models.CharField(max_length=255, db_index=True)
value_type = models.CharField(max_length=10, choices=VALUE_TYPE_CHOICES, db_index=True)
def __str__(self):
return self.name
class EventType(models.Model):
name = models.CharField(max_length=255, db_index=True)
def __str__(self):
return self.name
@staticmethod
def get_cached_instance(event_type_id):
cache_key = f'event_type_{event_type_id}'
event_type = cache.get(cache_key)
if not event_type:
event_type = EventType.objects.get(pk=event_type_id)
cache.set(cache_key, event_type, timeout=60 * 60)
return event_type
def get_attributes(self):
return Attribute.objects.filter(eventattribute__event_type=self)
class PublicationType(models.Model):
name = models.CharField(max_length=255, db_index=True)
def __str__(self):
return self.name
@staticmethod
def get_cached_instance(publication_type_id):
cache_key = f'publication_type_{publication_type_id}'
publication_type = cache.get(cache_key)
if not publication_type:
publication_type = PublicationType.objects.get(pk=publication_type_id)
cache.set(cache_key, publication_type, timeout=60 * 60)
return publication_type
def get_attributes(self):
return Attribute.objects.filter(publicationattribute__publication_type=self)
class EventAttribute(models.Model):
event_type = models.ForeignKey(EventType, on_delete=models.CASCADE, db_index=True)
attribute = models.ForeignKey(Attribute, on_delete=models.CASCADE, db_index=True)
class Meta:
unique_together = ('event_type', 'attribute')
class PublicationAttribute(models.Model):
publication_type = models.ForeignKey(PublicationType, on_delete=models.CASCADE, db_index=True)
attribute = models.ForeignKey(Attribute, on_delete=models.CASCADE, db_index=True)
class Meta:
unique_together = ('publication_type', 'attribute')
class Value(models.Model):
TEXT = 'text'
INTEGER = 'integer'
BOOLEAN = 'boolean'
FLOAT = 'float'
VALUE_TYPE_CHOICES = [
(TEXT, 'Text'),
(INTEGER, 'Integer'),
(BOOLEAN, 'Boolean'),
(FLOAT, 'Float'),
]
event = models.ForeignKey('Event', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
publication = models.ForeignKey('Publication', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
attribute = models.ForeignKey(Attribute, on_delete=models.CASCADE, db_index=True)
value_type = models.CharField(max_length=10, choices=VALUE_TYPE_CHOICES, db_index=True)
text_value = models.CharField(max_length=5000, blank=True, null=True)
integer_value = models.IntegerField(blank=True, null=True)
boolean_value = models.BooleanField(blank=True, null=True)
float_value = models.FloatField(blank=True, null=True)
class Meta:
unique_together = ('event', 'attribute', 'publication')
def __str__(self):
if self.event:
return f"{self.event.name} - {self.attribute.name}: {self.get_value()}"
if self.publication:
return f"{self.publication.name} - {self.attribute.name}: {self.get_value()}"
def get_value(self):
if self.value_type == self.TEXT:
return self.text_value
elif self.value_type == self.INTEGER:
return self.integer_value
elif self.value_type == self.BOOLEAN:
return self.boolean_value
elif self.value_type == self.FLOAT:
return self.float_value
class Event(BaseModel):
GRANULARITY_CHOICES = [
('year', 'Yearly'),
('month', 'Monthly'),
('day', 'Daily'),
('season', 'Seasonal'),
('custom', 'Custom'),
]
DATE_TYPE_CHOICES = [
('date_of_birth', 'Date of Birth'),
('date_of_death', 'Date of Death'),
('date_from', 'Date From'),
('date_to', 'Date To'),
]
event_type = models.ForeignKey('EventType', on_delete=models.CASCADE, db_index=True)
name = models.CharField(max_length=2000, null=True, blank=True, db_index=True)
category = models.ForeignKey('Category', on_delete=models.CASCADE, related_name='events', null=True, blank=True, db_index=True)
filters = models.ManyToManyField('Category', related_name='filtered_events')
start_date = models.DateField(null=True, blank=True, db_index=True)
end_date = models.DateField(null=True, blank=True, db_index=True)
start_granularity = models.CharField(max_length=10, choices=GRANULARITY_CHOICES, default='day', db_index=True)
end_granularity = models.CharField(max_length=10, choices=GRANULARITY_CHOICES, default='day', db_index=True)
custom_start_date = models.DateField(null=True, blank=True, db_index=True)
custom_end_date = models.DateField(null=True, blank=True, db_index=True)
custom_date_display = models.CharField(max_length=100, null=True, blank=True)
date_type = models.CharField(max_length=50, choices=DATE_TYPE_CHOICES, null=True, blank=True, db_index=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
def __str__(self):
return f"{self.name} ({self.event_type.name})"
def get_attributes(self):
return self.event_type.get_attributes()
def get_attribute_value(self, attribute_name):
attribute = self.get_attributes().filter(name=attribute_name).first()
if attribute:
return Value.objects.filter(event=self, attribute=attribute).first()
return None
class Publication(BaseModel):
publication_type = models.ForeignKey('PublicationType', on_delete=models.CASCADE, db_index=True)
name = models.CharField(max_length=1000, db_index=True)
authors = models.ManyToManyField('Person', through='PublicationPerson', related_name='publications')
locations = models.ManyToManyField('Location')
publishers = models.ManyToManyField('Organization', through='PublicationOrganization', related_name='publications')
point = models.ForeignKey('Point', on_delete=models.SET_NULL, null=True, blank=True, db_index=True)
event = models.ForeignKey('Event', on_delete=models.SET_NULL, null=True, blank=True, db_index=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
full_text = models.TextField(null=True, blank=True)
def __str__(self):
return f"{self.publication_type.name}: {self.name}"
def clean(self):
if self.point and self.event:
raise ValidationError("A publication cannot be linked to both a point and an event.")
elif not self.point and not self.event:
raise ValidationError("A publication must be linked to either a point or an event.")
def get_attributes(self):
return self.publication_type.get_attributes()
def get_attribute_value(self, attribute_name):
attribute = self.get_attributes().filter(name=attribute_name).first()
if attribute:
return Value.objects.filter(publication=self, attribute=attribute).first()
return None
class TrendData(BaseModel):
GRANULARITY_CHOICES = [
('D', 'Daily'),
('W', 'Weekly'),
('M', 'Monthly'),
('Y', 'Yearly'),
]
term = models.CharField(max_length=1000, db_index=True)
granularity = models.CharField(max_length=1, choices=GRANULARITY_CHOICES, db_index=True)
peak_date = models.DateField(null=True, blank=True, db_index=True)
peak_value = models.IntegerField(null=True, blank=True, db_index=True)
def __str__(self):
return f"{self.term} ({self.get_granularity_display()}) - {self.peak_date}: {self.peak_value}"
class Term(BaseModel):
term = models.CharField(max_length=1000, db_index=True)
def __str__(self):
return self.term
class TrendTimeSeriesData(BaseModel):
trend = models.ForeignKey('TrendData', related_name='time_series', on_delete=models.CASCADE, db_index=True)
date = models.DateField(db_index=True)
value = models.IntegerField(db_index=True)
def __str__(self):
return f"{self.trend.term} ({self.date}): {self.value}"
class Point(BaseModel):
GRANULARITY_CHOICES = [
('D', 'Daily'),
('W', 'Weekly'),
('M', 'Monthly'),
('Y', 'Yearly'),
]
terms = models.ManyToManyField(Term)
granularity = models.CharField(max_length=1, choices=GRANULARITY_CHOICES, default='D', db_index=True)
date = models.DateField(db_index=True)
start_date = models.DateField(db_index=True)
end_date = models.DateField(db_index=True)
smoothing = models.CharField(max_length=100, blank=True)
value = models.FloatField(null=True, blank=True, db_index=True)
def __str__(self):
return f"{', '.join(str(term) for term in self.terms.all())} - {self.date} ({self.get_granularity_display()})"
class Batch(BaseModel):
name = models.CharField(max_length=255, unique=True, db_index=True)
status = models.CharField(max_length=50, db_index=True)
def __str__(self):
return self.name
class Category(BaseModel):
name = models.CharField(max_length=255, db_index=True)
def __str__(self):
return self.name
class Location(BaseModel):
address = models.CharField(max_length=255, db_index=True)
latitude = models.FloatField(null=True, blank=True, db_index=True)
longitude = models.FloatField(null=True, blank=True, db_index=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
def __str__(self):
return self.address
class Person(BaseModel):
name = models.CharField(max_length=500, db_index=True)
title = models.CharField(max_length=255, null=True, blank=True, db_index=True)
biography = models.TextField(null=True, blank=True)
biography_link = models.URLField(null=True, blank=True)
photo_original_link = models.URLField(null=True, blank=True)
photo_local_link = models.URLField(null=True, blank=True)
photo_alt_text = models.CharField(max_length=255, null=True, blank=True)
photo_rights = models.CharField(max_length=255, null=True, blank=True)
birth_gender = models.CharField(max_length=50, null=True, blank=True, db_index=True)
current_gender = models.CharField(max_length=50, null=True, blank=True, db_index=True)
pronouns = models.CharField(max_length=50, null=True, blank=True, db_index=True)
ethnicity = models.CharField(max_length=50, null=True, blank=True, db_index=True)
country_of_birth = models.CharField(max_length=100, null=True, blank=True, db_index=True)
handled = models.BooleanField(default=False, db_index=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
filters = models.ManyToManyField('Filter', related_name='people')
organizations = models.ManyToManyField('Organization', through='PersonOrganization', related_name='members')
def __str__(self):
return self.name
class Organization(BaseModel):
name = models.CharField(max_length=1000, db_index=True)
abbreviation = models.CharField(max_length=255, blank=True)
description = models.TextField(blank=True, null=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
persons = models.ManyToManyField('Person', through='PersonOrganization', related_name='affiliations')
def __str__(self):
return self.name
class EventPublication(models.Model):
event = models.ForeignKey('Event', on_delete=models.CASCADE, db_index=True)
publication = models.ForeignKey('Publication', on_delete=models.CASCADE, db_index=True)
relationshipType = models.CharField(max_length=100, db_index=True)
class PublicationPerson(models.Model):
publication = models.ForeignKey('Publication', on_delete=models.CASCADE, db_index=True)
person = models.ForeignKey('Person', on_delete=models.CASCADE, db_index=True)
role = models.CharField(max_length=100, db_index=True)
class PublicationOrganization(models.Model):
publication = models.ForeignKey('Publication', on_delete=models.CASCADE, db_index=True)
organization = models.ForeignKey('Organization', on_delete=models.CASCADE, db_index=True)
role = models.CharField(max_length=100, db_index=True)
class PersonOrganization(models.Model):
person = models.ForeignKey('Person', on_delete=models.CASCADE, db_index=True)
organization = models.ForeignKey('Organization', on_delete=models.CASCADE, db_index=True)
role = models.CharField(max_length=100, null=True, blank=True, db_index=True)
class Meta:
unique_together = ('person', 'organization')
def __str__(self):
return f"{self.person.name} - {self.organization.name} ({self.role})"
class EventPerson(models.Model):
event = models.ForeignKey('Event', on_delete=models.CASCADE, db_index=True)
person = models.ForeignKey('Person', on_delete=models.CASCADE, db_index=True)
role = models.CharField(max_length=255, null=True, blank=True, db_index=True)
class EventOrganization(models.Model):
event = models.ForeignKey('Event', on_delete=models.CASCADE, db_index=True)
organization = models.ForeignKey('Organization', on_delete=models.CASCADE, db_index=True)
role = models.CharField(max_length=100, db_index=True)
class EventLocation(models.Model):
event = models.ForeignKey('Event', on_delete=models.CASCADE, db_index=True)
location = models.ForeignKey('Location', on_delete=models.CASCADE, db_index=True)
relationshipType = models.CharField(max_length=100, db_index=True)
class Settings(models.Model):
proxies = models.TextField(help_text="Enter each proxy on a new line")
def get_proxies_list(self):
return self.proxies.splitlines()
class Platform(BaseModel):
name = models.CharField(max_length=255, db_index=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)
def __str__(self):
return self.name
class Filter(BaseModel):
name = models.CharField(max_length=255, db_index=True)
def __str__(self):
return self.name
class Expertise(BaseModel):
name = models.CharField(max_length=255, db_index=True)
people = models.ManyToManyField(Person, related_name='expertises')
def __str__(self):
return self.name
class Source(BaseModel):
zotero_item_id = models.CharField(max_length=10, db_index=True)
is_primary = models.BooleanField(default=False, db_index=True)
sourceable_id = models.IntegerField(db_index=True)
sourceable_type = models.ForeignKey(ContentType, on_delete=models.CASCADE, db_index=True)
formatted_citation = models.TextField(max_length=1000, null=True, blank=True)
page_numbers = models.CharField(max_length=20, null=True, blank=True)
batch = models.ForeignKey('Batch', on_delete=models.CASCADE, null=True, blank=True, db_index=True)