here’s (simplistic) bench done on sqlite (couldnt be bothered to do it elsewhere…)
models.py
from django.db import models
class Person(models.Model):
name = models.CharField('Name', max_length=128, null=True, blank=True)
parent = models.ForeignKey('self', null=True, blank=True, on_delete=models.CASCADE)
def __str__(self):
return self.name
def get_descendants(self):
descendants = Person.objects.raw('''
WITH RECURSIVE descendants(id, name, breadcrumb) AS (
SELECT id, name, CAST(name AS CHAR(100))
FROM person_person
WHERE id=%s
UNION ALL
-- pg/my
-- SELECT pp.id, pp.name, CONCAT(d.breadcrumb,' / ',pp.name)
-- for sqlite
SELECT pp.id, pp.name, d.breadcrumb || ' / ' || pp.name
FROM descendants d
JOIN person_person pp ON d.id=pp.parent_id)
SELECT * FROM descendants;
''', [
self.id
])
return descendants
management/commands/person_populate.py
import random
import string
from django.utils import timezone
from django.core.management.base import BaseCommand
from django.conf import settings
from person.models import Person
def random_string_generator(str_size, allowed_chars):
return ''.join(random.choice(allowed_chars) for x in range(str_size))
# chars = string.ascii_letters + string.punctuation
chars = string.ascii_letters
size = 12
class Command(BaseCommand):
help = '''
bleh
'''
def handle(self, *args, **options):
person_count = 25000
Person.objects.all().delete()
l = [Person(
name=random_string_generator(size, chars),
) for i in range(person_count)]
print(l)
persons = Person.objects.bulk_create(l)
print(Person.objects.count())
# then aassign random parents...
for person in Person.objects.all():
person.parent = Person.objects.all().order_by('?').first()
person.save(update_fields=['parent'])
print(f'Done for person {person.pk}')
management/commands/person_get_descendants.py
import random
import string
from time import time
from django.utils import timezone
from django.core.management.base import BaseCommand
from django.conf import settings
from person.models import Person
class Command(BaseCommand):
help = '''
bleh
'''
def handle(self, *args, **options):
person = Person.objects.all().order_by('?').first()
print(person)
s = time()
descendants = person.get_descendants()
for descendant in descendants:
print(descendant.__dict__)
e = time()
print(f'Descendants fetched in {e-s}')
25k rows on sqlite (reasonably powerful laptop), with outputs similar to the following:
$ python manage.py person_get_descendants
ThTJzvZeltYb
{’_state’: <django.db.models.base.ModelState object at 0x000001D36DD7A3A0>, ‘id’: 123256, ‘name’: ‘ThTJzvZeltYb’, ‘breadcrumb’: ‘ThTJzvZeltYb’}
{’_state’: <django.db.models.base.ModelState object at 0x000001D36DD7A1F0>, ‘id’: 110862, ‘name’: ‘wvuXIfyxINTz’, ‘breadcrumb’: ‘ThTJzvZeltYb / wvuXIfyxINTz’}
Descendants fetched in 0.0009772777557373047
some huge lists sometimes (its completely random after all), but then I suspect the print() actually takes more time than the query itself. Again, can’t be bothered to measure at db level.
It seems fast enough…?